##### ####################################################
#####                                               ######
#####               Graphical analysis            
#####                                               ######
##### ####################################################

# init ------------------------------------------------------------

rm(list=ls())
set.seed(221186)

# Load libraries

library(zoo) # 1.8.4
library(data.table) # 1.11.4
library(scales) # 1.0.0
library(ggplot2) # 3.1.0
library(igraph) # 1.2.2

# Load data

load("data/speeches.Rdata")

# Sparkplot ------------------------------------------------------------ 

remove_ministries <- speeches[minister_present == T, sum(minister_gender == "F"), by = debate_department][V1==0]$debate_department

out <- speeches[minister_in_debate==F & is_speaker == F & !debate_department %in% remove_ministries & minister_present == T,
                list(prop.women.words=sum(word_count[Gender=="F"],na.rm=T)/sum(word_count),
                     prop.women.speeches=length(body[Gender=="F"])/length(body),
                     ratio.women.words=(sum(word_count[Gender=="F"],na.rm=T)/sum(word_count))/unique(prop_women),
                     ratio.women.speeches=(length(body[Gender=="F"])/length(body))/unique(prop_women),
                     minister.gender=unique(minister_gender),
                     minister.name=unique(minister_name),
                     ministry = unique(debate_department),
                     yearmon=unique(yearmon),
                     date = unique(hdate)[1],
                     minister.name = unique(minister_name)), by = list(yearmon, debate_department)]

out$ministry <- as.factor(out$ministry)

setkey(out,ministry,date)

xx <- rle(as.numeric(as.factor(out$minister.name)))
out$episode <- unlist(sapply(1:length(xx$lengths), function(x) rep(x,xx$lengths[x])))
out[,id:=1:length(prop.women.words),by=ministry]

out[,order := 1-(sum(minister.gender=="F")/length(minister.gender)),by=ministry]
out$ministry <- factor(out$ministry, levels = unique(out$ministry[order(out$order)]))

male.col <- "gray"
female.col <- "black"
background.col <- "#F3F4E8"

out <- out[,!duplicated(names(out)),with = F]

spark <- ggplot(out, aes(x = date, y = prop.women.words, col = minister.gender, group = episode)) + 
  geom_line(size = .75) + 
  facet_grid(ministry~., switch = "y") +
  theme(strip.text.y = element_text(angle=180),
        plot.background = element_rect(fill = "transparent",colour = NA),
        panel.border = element_rect(colour = "black", fill=NA, size=0.1),
        panel.background = element_blank(),
        #panel.border = element_blank(),
        legend.key = element_rect(fill = "transparent",colour = NA),
        legend.background = element_rect(fill = "transparent",colour = NA),
        legend.position = "bottom",
        panel.grid = element_blank(),
        panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        text = element_text(size = 18))+
  scale_y_continuous(breaks = NULL, limits = range(out$prop.women.words)) + 
  scale_color_manual(values = c(male.col, female.col), labels = c("Male minister", "Female minister"), name = "")+
  ylab("")+
  xlab("")+
  geom_vline(xintercept = as.numeric(as.Date(c("1997-05-05","2001-05-05","2005-05-05","2010-05-05","2015-05-05", "2016-07-13"))), linetype = "dotted", size = 0.5) 

print(spark)
ggsave("plots/spark_plot_new_trans.png", bg = "transparent", units = "in", width = 8, height = 10)


## Female representation over time ------------------------------------------

setkey(speeches, hdate, section_id, subsection_id, hpos)

speeches$is_opposition_minister[is.na(speeches$is_opposition_minister)] <- F
speeches$is_minister[is.na(speeches$is_minister)] <- F

out <- speeches[is_minister==F & is_speaker == F & is_opposition_minister == F & minister_present == T & !grepl("minister|shadow|Spokesperson|secretary|leader|whip|pps|speaker|chairman|Chancellor",position,ignore.case = T),
                list(prop.women.words=sum(word_count[Gender=="F"],na.rm=T)/sum(word_count),
                     prop.women.speeches=length(body[Gender=="F"])/length(body),
                     ratio.women.words=(sum(word_count[Gender=="F"],na.rm=T)/sum(word_count))/unique(prop_women),
                     ratio.men.words=(sum(word_count[Gender=="M"],na.rm=T)/sum(word_count))/(1-unique(prop_women)),
                     ratio.women.speeches=(length(body[Gender=="F"])/length(body))/unique(prop_women),
                     minister.gender=unique(minister_gender),
                     minister.name=unique(minister_name),
                     ministry = unique(debate_department),
                     yearmon = unique(yearmon),
                     model_weights = .N), by = list(subsection_id, debate_department)]

# Mean and 95% intervals
mean_out <- paste0(round(mean(out$ratio.women.words),2)," [",round(as.numeric(t.test(out$ratio.women.words)$conf.int)[1],2),", ",round(as.numeric(t.test(out$ratio.women.words)$conf.int)[2],2),"]")

x <- out$ratio.women.words
median_out <- paste0(round(median(out$ratio.women.words),2)," [",round(sort(x)[qbinom(c(.025,.975), length(x), 0.5)][1],2),", ",round(sort(x)[qbinom(c(.025,.975), length(x), 0.5)][2],2),"]")

sink("latex/tables/usefulNumbers/mean_female_speech_ratio.tex")
cat(mean_out)
sink()

sink("latex/tables/usefulNumbers/median_female_speech_ratio.tex")
cat(median_out)
sink()

median_and_mean_ratio <- out[,list(median = median(ratio.women.speeches), mean = mean(ratio.women.speeches)),by = yearmon]
median_and_mean_ratio$yearmon <- as.numeric(median_and_mean_ratio$yearmon)
median_loess <- loess(median ~ yearmon, median_and_mean_ratio)
mean_loess <- loess(mean ~ yearmon, median_and_mean_ratio)

median_predict <- predict(median_loess, newdata = median_and_mean_ratio, se = T)
mean_predict <- predict(mean_loess, newdata = median_and_mean_ratio, se = T)

median_pe <- median_predict$fit
median_hi <- median_pe + 1.96*median_predict$se.fit
median_lo <- median_pe - 1.96*median_predict$se.fit

mean_pe <- mean_predict$fit
mean_hi <- mean_pe + 1.96*mean_predict$se.fit
mean_lo <- mean_pe - 1.96*mean_predict$se.fit

pdf("plots/mean_median_speech_ratio.pdf",12,8)
par(cex = 1.4, lwd = 2)
plot(median_and_mean_ratio$yearmon,median_pe, type= "l", ylim = c(0,1.5), lty = 1, main = "", ylab = "Female speech ratio", xlab = "", xaxt = "n", bty = "n")
lines(median_and_mean_ratio$yearmon,median_hi, lty = 1)
lines(median_and_mean_ratio$yearmon,median_lo, lty = 1)

polygon(x = c(median_and_mean_ratio$yearmon, rev(median_and_mean_ratio$yearmon)),
        y = c(median_hi, 
              rev(median_lo)),
        col =  adjustcolor(alpha("black",.2)), border = NA)

abline(h = 1, lty = 3, lwd = 3)

axis(1, at = as.numeric(as.yearmon(c("1997-05-05","2001-05-05","2005-05-05","2010-05-05","2015-05-05"))), labels = c("1997", "2001", "2005", "2010", "2015"))

legend("topright", legend = c("Median speech ratio"), lty = c(1), col = "black", bty = "n")
dev.off()

## Ratio by ministry plot ------------------------------------------

ratios <- out[,list(
  mean = mean(ratio.women.words),
  lower = as.numeric(t.test(ratio.women.words,mu=1)$conf.int)[1],
  upper = as.numeric(t.test(ratio.women.words,mu=1)$conf.int)[2],
  median = median(ratio.women.words),
  lower.median = sort(ratio.women.words)[qbinom(c(.025), length(ratio.women.words), 0.5)],
  upper.median = sort(ratio.women.words)[qbinom(c(.975), length(ratio.women.words), 0.5)]),
  by=ministry]

male.col <- "#E69F00"
female.col <- "#0072B2"
background.col <- "transparent"

png("plots/ratio_by_ministry_very_simple.png",1200,750)
ratios$ministry <- reorder(ratios$ministry, ratios$mean)
ggplot(ratios,aes(ministry, mean,ymin=lower,ymax=upper))+
  theme(panel.background=element_rect(fill= background.col),
        plot.background=element_rect(fill= background.col),
        panel.grid.major.y=element_line(colour="gray",linetype=2,size=0.35),
        panel.grid.minor=element_blank(),
        panel.grid.major.x=element_blank(), text=element_text(size=20))+
  scale_y_continuous(breaks=c(0.5,1,1.5,2))+
  xlab("")+ylab("Ratio")+
  geom_pointrange(size=1.2)+
  geom_point(size=5) + 
  coord_flip()+geom_hline(yintercept=1, size=1, linetype=2)
dev.off()

### ################################################
### Ministers over time
### ################################################

load("data/ministers.Rdata")

ministers <- ministers[!is.na(ministers$department),]

ministers$time_held <- ministers$EndDate - ministers$StartDate

most_female <- ministers[,list(women_held = sum(as.numeric(time_held[Gender == "F"])), total_time = sum(as.numeric(time_held))),by = department]

setkey(most_female, women_held, total_time)

ministers$department <- factor(ministers$department, levels = most_female$department)

male.col <- "gray"
female.col <- "black"
background.col <- "#F3F4E8"

ministers$Gender <- factor(ministers$Gender, levels = c("M","F"))

p <- ggplot(ministers,aes(x=StartDate,xend=EndDate, y=department, yend=department, col=Gender))+
  geom_segment(size=2)+
  xlim(c(min(ministers$StartDate),sort(unique(ministers$EndDate),decreasing=T)[1]))+
  theme(plot.background = element_rect(fill = "transparent",colour = NA),
        panel.border = element_rect(colour = "black", fill=NA, size=0.01),
        panel.background = element_blank(),#panel.background=element_rect(fill="white"),	
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        legend.background = element_rect(fill = "transparent",colour = NA),
        legend.key = element_rect(fill = "transparent",colour = NA),
        text=element_text(size=20),
        legend.position = "bottom")+
  scale_colour_manual("",values=c(male.col,female.col), labels = c("Male minister","Female minister"))+
  geom_vline(xintercept=as.numeric(as.Date(c("1997-05-05","2001-05-05","2005-05-05","2010-05-05","2015-05-05","2016-07-13"))),linetype=2)+
  ylab("")+
  xlab("")


png("plots/ministers_over_time.png", 1500, 1300, res = 140)
print(p)
dev.off()


### ################################################
### Example influence plot
### ################################################

load("data/influence_example_data.Rdata")

directed.cosine <- influence_example_data$directed.cosine
out <- influence_example_data$out
speech.subset <- influence_example_data$speech.subset

threshold <- 0.25

directed.cosine[directed.cosine < threshold] <- 0
mp.names <- unlist(lapply(strsplit(out[speech.subset]$name," "),function(x)x[[2]]))
rownames(directed.cosine) <- mp.names
colnames(directed.cosine) <- mp.names

# Create the graph - speakers are nodes, edges are the similarity between speeches. Weights provided by the cosine similarity (adjacency) matrix
mygraph.directed <- graph_from_adjacency_matrix(directed.cosine, diag=F ,mode="directed" ,weighted=T) 
mygraph.undirected <- graph_from_adjacency_matrix(directed.cosine, diag=F ,mode="undirected" ,weighted=T) 

page.rank.vec <- page.rank(mygraph.directed,directed=T)$vector
hub.vec <- hub.score(mygraph.directed)$vector
auth.vec <- authority.score(mygraph.directed)$vector

# Calculate the eigenvector centrality scores
eigen.vec <- eigen_centrality(mygraph.undirected, directed=F)$vector

for.plotting <- t(directed.cosine[,rev(1:dim(directed.cosine)[2])])

male.col <- "black"
female.col <- "#0072B2"
background.col <- "#F3F4E8"

png("plots/debate_centrality_example.png", 1150, 1000)
par(xpd=F, mar=c(1,2,2,7), mfrow=c(1,1), oma=c(4,4,0,0), cex=2.5, bg="transparent")
size.factor <- 1
xlims <- range((1:dim(for.plotting)[2])-0.5)
plot(0,0,col="white", ylim=c(0,dim(for.plotting)[1]), xlim=xlims, axes=F, xlab="", ylab="", cex.lab=1*size.factor)
axis(1,at=(1:dim(for.plotting)[2])-0.5, labels=attributes(for.plotting)$dimnames[[2]], las=2,lwd=0, cex.axis=1*size.factor)
axis(1,at=0:dim(for.plotting)[2], labels=NA, las=2)
axis(2,at=(1:dim(for.plotting)[1])-0.5, labels=attributes(for.plotting)$dimnames[[1]], las=1, lwd=0.001, cex.axis=1*size.factor)
axis(2,at=0:dim(for.plotting)[1], labels=NA, las=1)
abline(h=0:dim(for.plotting)[1], lty=3, col="gray", lwd=5.5*size.factor)
abline(v=0:dim(for.plotting)[2], lty=3, col="gray", lwd=5.5*size.factor)
mid.points.x <- (1:dim(for.plotting)[2])-0.5
for(i in 1:dim(for.plotting)[2]){
  mid.points.y <- (1:dim(for.plotting)[1])-0.5
  mid.point.x <- mid.points.x[i]
  prop <- for.plotting[,i]
  xlefts <- mid.point.x - (1/2)*prop
  xrights <- mid.point.x + (1/2)*prop
  ybottoms <- mid.points.y - (1/2)*prop
  ytops <- mid.points.y + (1/2)*prop
  
  rect(xlefts,ybottoms,xrights,ytops, col=alpha(male.col,0.999), border=F)
  
}

xlefts <- rev(mid.points.y) - (1/2)
xrights <- rev(mid.points.y) + (1/2)
ybottoms <- mid.points.x - (1/2)
ytops <- mid.points.x + (1/2)

rect(xlefts,ybottoms,xrights,ytops, col="gray", border=F)

par(xpd=T)	
text(x = dim(for.plotting)[2]+1, y = max(mid.points.x)+1, "Influence")
text(x = dim(for.plotting)[2]+1, y = mid.points.x, rev(round(page.rank.vec,2))	,xpd=T)
dev.off()

igraph_options(plot.layout=layout_in_circle)
png("plots/debate_network_example.png", 1000, 1000)
par(xpd=F, mar=c(0,0,0,0), cex=3, bg="transparent")
plot(mygraph.directed, vertex.color=alpha("gray", page.rank.vec*3.5), edge.arrow.size=1, vertex.label.cex=1, margin=c(0,0.1,0,0), vertex.label.color = "black", vertex.frame.color = NA)
dev.off()

